[#792] Add support for loading of CSV data into tables - partial commit

This commit is contained in:
Lukas Eder 2011-08-22 22:29:28 +00:00
parent a35e9466ed
commit 280e5dd2a3
14 changed files with 1990 additions and 0 deletions

View File

@ -94,6 +94,7 @@ import org.jooq.EnumType;
import org.jooq.Field;
import org.jooq.Insert;
import org.jooq.InsertQuery;
import org.jooq.Loader;
import org.jooq.MasterDataType;
import org.jooq.MergeFinalStep;
import org.jooq.QueryPart;
@ -6554,6 +6555,156 @@ public abstract class jOOQAbstractTest<
create().select(TBook_TITLE(), TBook_ID()).from(TBook()).fetch()));
}
@Test
public void testLoader() throws Exception {
Field<Integer> count = create().count();
// Empty CSV file
// --------------
Loader<A> loader =
create().loadInto(TAuthor())
.loadCSV("")
.fields(TAuthor_ID())
.execute();
assertEquals(0, loader.processed());
assertEquals(0, loader.errors().size());
assertEquals(0, loader.stored());
assertEquals(0, loader.ignored());
assertEquals(2, (int) create().select(count).from(TAuthor()).fetchOne(count));
// Constraint violations (LAST_NAME is NOT NULL)
// Loading is aborted
// ---------------------------------------------
loader =
create().loadInto(TAuthor())
.loadCSV(
"3\n" +
"4")
.fields(TAuthor_ID())
.ignoreRows(0)
.execute();
assertEquals(1, loader.processed());
assertEquals(1, loader.errors().size());
assertNotNull(loader.errors().get(0));
assertEquals(0, loader.stored());
assertEquals(1, loader.ignored());
assertEquals(2, (int) create().select(count).from(TAuthor()).fetchOne(count));
// Constraint violations (LAST_NAME is NOT NULL)
// Errors are ignored
// ---------------------------------------------
loader =
create().loadInto(TAuthor())
.onErrorIgnore()
.loadCSV(
"3\n" +
"4")
.fields(TAuthor_ID())
.ignoreRows(0)
.execute();
assertEquals(2, loader.processed());
assertEquals(2, loader.errors().size());
assertNotNull(loader.errors().get(0));
assertNotNull(loader.errors().get(1));
assertEquals(0, loader.stored());
assertEquals(2, loader.ignored());
assertEquals(2, (int) create().select(count).from(TAuthor()).fetchOne(count));
// Constraint violations (Duplicate records)
// Loading is aborted
// -----------------------------------------
loader =
create().loadInto(TAuthor())
.onDuplicateKeyError()
.onErrorAbort()
.loadCSV(
"1;'Kafka'\n" +
"2;Frisch")
.fields(TAuthor_ID(), TAuthor_LAST_NAME())
.quote('\'')
.separator(';')
.ignoreRows(0)
.execute();
assertEquals(1, loader.processed());
assertEquals(1, loader.errors().size());
assertNotNull(loader.errors().get(0));
assertEquals(0, loader.stored());
assertEquals(1, loader.ignored());
assertEquals(2, (int) create().select(count).from(TAuthor()).fetchOne(count));
// Constraint violations (Duplicate records)
// Errors are ignored
// -----------------------------------------
loader =
create().loadInto(TAuthor())
.onDuplicateKeyIgnore()
.onErrorAbort()
.loadCSV(
"1,\"Kafka\"\n" +
"2,Frisch")
.fields(TAuthor_ID(), TAuthor_LAST_NAME())
.ignoreRows(0)
.execute();
assertEquals(2, loader.processed());
assertEquals(0, loader.errors().size());
assertEquals(2, loader.ignored());
assertEquals(2, (int) create().select(count).from(TAuthor()).fetchOne(count));
// Two records
// -----------
loader =
create().loadInto(TAuthor())
.loadCSV(
"####Some Data####\n" +
"\"ID\",\"Last Name\"\r" +
"3,Hesse\n" +
"4,Frisch")
.fields(TAuthor_ID(), TAuthor_LAST_NAME())
.quote('"')
.separator(',')
.ignoreRows(2)
.execute();
assertEquals(2, loader.processed());
assertEquals(2, loader.stored());
assertEquals(0, loader.ignored());
assertEquals(0, loader.errors().size());
assertEquals(2, (int) create().select(count)
.from(TAuthor())
.where(TAuthor_ID().in(3, 4))
.and(TAuthor_LAST_NAME().in("Hesse", "Frisch"))
.fetchOne(count));
// Two records but don't load one column
// -------------------------------------
loader =
create().loadInto(TAuthor())
.loadCSV(
"\"ID\",\"First Name\",\"Last Name\"\r" +
"5,Hermann,Hesse\n" +
"6,\"Max\",Frisch")
.fields(TAuthor_ID(), null, TAuthor_LAST_NAME())
.execute();
assertEquals(2, loader.processed());
assertEquals(2, loader.stored());
assertEquals(0, loader.ignored());
assertEquals(0, loader.errors().size());
assertEquals(2, (int) create().select(count)
.from(TAuthor())
.where(TAuthor_ID().in(5, 6))
.and(TAuthor_LAST_NAME().in("Hesse", "Frisch"))
.fetchOne(count));
// TODO Add commit / rollback tests
// TODO Add onDuplicateKeyUpdate tests
}
/**
* Reflection helper
*/

View File

@ -0,0 +1,70 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq;
import java.util.List;
/**
* The <code>Loader</code> API is used for configuring data loads.
* <p>
* This type is the final type holding information about the outcome of the data
* load.
*
* @author Lukas Eder
*/
public interface Loader<R extends TableRecord<R>> {
/**
* A list of errors that might have happened during the load.
*/
List<LoaderError> errors();
/**
* The number of processed rows
*/
int processed();
/**
* The number of ignored rows
*/
int ignored();
/**
* The number of inserted or updated rows
*/
int stored();
}

View File

@ -0,0 +1,68 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq;
/**
* The <code>Loader</code> API is used for configuring data loads.
* <p>
* The step in constructing the {@link Loader} object where you can set the
* optional CSV loader options.
*
* @author Lukas Eder
*/
public interface LoaderCSVOptionsStep<R extends TableRecord<R>> extends LoaderLoadStep<R> {
/**
* Specify that a certain number of rows should be ignored from the CSV
* file. This is useful for skipping processing information
* <p>
* By default, this is set to <code>1</code>, as CSV files are expected to
* hold a header row.
*
* @param number The number of rows to ignore.
*/
LoaderCSVOptionsStep<R> ignoreRows(int number);
/**
* Specify the quote character. By default, this is <code>"</code>
*/
LoaderCSVOptionsStep<R> quote(char quote);
/**
* Specify the separator character. By default, this is <code>,</code>
*/
LoaderCSVOptionsStep<R> separator(char separator);
}

View File

@ -0,0 +1,67 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq;
import java.util.Collection;
/**
* The <code>Loader</code> API is used for configuring data loads.
* <p>
* The step in constructing the {@link Loader} object where you can set the
* mandatory CSV loader options.
*
* @author Lukas Eder
*/
public interface LoaderCSVStep<R extends TableRecord<R>> {
/**
* Specify the the fields to be loaded into the table in the correct order.
* The CSV column at index <code>i</code> is inserted into the table field
* at index <code>i</code>. If <code>fields[i] == null</code>, then the CSV
* column is skipped.
*/
LoaderCSVOptionsStep<R> fields(Field<?>... fields);
/**
* Specify the the fields to be loaded into the table in the correct order.
* The CSV column at index <code>i</code> is inserted into the table field
* at index <code>i</code>. If
* <code>new ArrayList(fields).get(i) == null</code>, then the CSV column is
* skipped.
*/
LoaderCSVOptionsStep<R> fields(Collection<? extends Field<?>> fields);
}

View File

@ -0,0 +1,69 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq;
import java.io.IOException;
import java.sql.SQLException;
/**
* An error that occurred during loading. Errors are only handled when they were
* caused by {@link SQLException}'s. {@link IOException}'s and other problems
* will abort loading fatally.
*
* @author Lukas Eder
*/
public interface LoaderError {
/**
* The underlying {@link SQLException} that caused the error
*/
SQLException exception();
/**
* The processed row index starting with <code>0</code> that caused the error
*/
int rowIndex();
/**
* The row data that caused the error
*/
String[] row();
/**
* The query whose execution failed
*/
Query query();
}

View File

@ -0,0 +1,56 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq;
import java.io.IOException;
/**
* The <code>Loader</code> API is used for configuring data loads.
* <p>
* The step in constructing the {@link Loader} object where you can execute the
* load.
*
* @author Lukas Eder
*/
public interface LoaderLoadStep<R extends TableRecord<R>> {
/**
* Execute the load. All exceptions are caught and wrapped in the resulting
* <code>Loader</code> object. They can be accessed using
* {@link Loader#errors()}
*/
Loader<R> execute() throws IOException;
}

View File

@ -0,0 +1,177 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq;
/**
* The <code>Loader</code> API is used for configuring data loads.
* <p>
* Add options to for the loading behaviour
*
* @author Lukas Eder
*/
public interface LoaderOptionsStep<R extends TableRecord<R>> extends LoaderSourceStep<R> {
/**
* Instruct the <code>Loader</code> to update duplicate records if the main
* unique key's value is already in the database. This is only supported if
* {@link InsertQuery#onDuplicateKeyUpdate(boolean)} is supported, too.
* <p>
* If the loaded table does not have a main key, then all records are
* inserted and this clause behaves like {@link #onDuplicateKeyIgnore()}
* <p>
* If you don't specify a behaviour, {@link #onDuplicateKeyError()} will be
* the default. This cannot be combined with {@link #onDuplicateKeyError()}
* or {@link #onDuplicateKeyIgnore()}
*/
LoaderOptionsStep<R> onDuplicateKeyUpdate();
/**
* Instruct the <code>Loader</code> to skip duplicate records if the main
* unique key's value is already in the database. This is only supported if
* {@link InsertQuery#onDuplicateKeyUpdate(boolean)} is supported, too.
* <p>
* If the loaded table does not have a main key, then all records are
* inserted.
* <p>
* If you don't specify a behaviour, {@link #onDuplicateKeyError()} will be
* the default. This cannot be combined with {@link #onDuplicateKeyError()}
* or {@link #onDuplicateUpdate()}
*/
LoaderOptionsStep<R> onDuplicateKeyIgnore();
/**
* Instruct the <code>Loader</code> to cause an error in loading if there
* are any duplicate records.
* <p>
* If this is combined with {@link #onErrorAbort()} and {@link #commitAll()}
* in a later step of <code>Loader</code>, then loading is rollbacked on
* abort.
* <p>
* If you don't specify a behaviour, this will be
* the default. This cannot be combined with {@link #onDuplicateKeyIgnore()}
* or {@link #onDuplicateUpdate()}
*/
LoaderOptionsStep<R> onDuplicateKeyError();
/**
* Instruct the <code>Loader</code> to ignore any errors that might occur
* when inserting a record. The <code>Loader</code> will then skip the
* record and try inserting the next one. After loading, you can access
* errors with {@link Loader#errors()}
* <p>
* If you don't specify a behaviour, {@link #onErrorAbort()} will be the
* default. This cannot be combined with {@link #onErrorAbort()}
*/
LoaderOptionsStep<R> onErrorIgnore();
/**
* Instruct the <code>Loader</code> to abort loading after the first error
* that might occur when inserting a record. After loading, you can access
* errors with {@link Loader#errors()}
* <p>
* If this is combined with {@link #commitAll()} in a later step of
* <code>Loader</code>, then loading is rollbacked on abort.
* <p>
* If you don't specify a behaviour, this will be the default. This cannot
* be combined with {@link #onErrorIgnore()}
*/
LoaderOptionsStep<R> onErrorAbort();
/**
* Commit each loaded record. This will prevent batch <code>INSERT</code>'s
* altogether. Otherwise, this is the same as calling
* {@link #commitAfter(int)} with <code>1</code> as parameter.
* <p>
* With this clause, errors will never result in a rollback, even when you
* specify {@link LoaderOptionsStep#onDuplicateKeyError()} or
* {@link LoaderOnErrorStep#onErrorAbort()}
* <p>
* The COMMIT OPTIONS might be useful for fine-tuning performance behaviour
* in some RDBMS, where large commits lead to a high level of concurrency in
* the database.
* <p>
* If you don't specify a COMMIT OPTION, {@link #commitNone()} will be the
* default, leaving transaction handling up to you.
*/
LoaderOptionsStep<R> commitEach();
/**
* Commit after a certain number of inserted records. This may enable batch
* <code>INSERT</code>'s for at most <code>number</code> records.
* <p>
* With this clause, errors will never result in a rollback, even when you
* specify {@link LoaderOptionsStep#onDuplicateKeyError()} or
* {@link LoaderOnErrorStep#onErrorAbort()}
* <p>
* The COMMIT OPTIONS might be useful for fine-tuning performance behaviour
* in some RDBMS, where large commits lead to a high level of concurrency in
* the database.
* <p>
* If you don't specify a COMMIT OPTION, {@link #commitNone()} will be the
* default, leaving transaction handling up to you.
*
* @param number The number of records that are committed together.
*/
LoaderOptionsStep<R> commitAfter(int number);
/**
* Commit only after inserting all records. If this is used together with
* {@link LoaderOptionsStep#onDuplicateKeyError()} or
* {@link LoaderOnErrorStep#onErrorAbort()}, an abort will result in a
* rollback of previously loaded records.
* <p>
* The COMMIT OPTIONS might be useful for fine-tuning performance behaviour
* in some RDBMS, where large commits lead to a high level of concurrency in
* the database.
* <p>
* If you don't specify a COMMIT OPTION, {@link #commitNone()} will be the
* default, leaving transaction handling up to you.
*/
LoaderOptionsStep<R> commitAll();
/**
* Leave committing / rollbacking up to client code.
* <p>
* The COMMIT OPTIONS might be useful for fine-tuning performance behaviour
* in some RDBMS, where large commits lead to a high level of concurrency in
* the database.
* <p>
* If you don't specify a COMMIT OPTION, this will be the default, leaving
* transaction handling up to you.
*/
LoaderOptionsStep<R> commitNone();
}

View File

@ -0,0 +1,99 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.Reader;
import org.xml.sax.InputSource;
/**
* The <code>Loader</code> API is used for configuring data loads.
* <p>
* The step in constructing the {@link Loader} object where you can specify the
* load type and data source.
*
* @author Lukas Eder
*/
public interface LoaderSourceStep<R extends TableRecord<R>> {
/**
* Load CSV data
*/
LoaderCSVStep<R> loadCSV(File file) throws FileNotFoundException;
/**
* Load CSV data
*/
LoaderCSVStep<R> loadCSV(String data);
/**
* Load CSV data
*/
LoaderCSVStep<R> loadCSV(InputStream stream);
/**
* Load CSV data
*/
LoaderCSVStep<R> loadCSV(Reader reader);
/**
* Load XML data
*/
LoaderXMLStep<R> loadXML(File file) throws FileNotFoundException;
/**
* Load XML data
*/
LoaderXMLStep<R> loadXML(String data);
/**
* Load XML data
*/
LoaderXMLStep<R> loadXML(InputStream stream);
/**
* Load XML data
*/
LoaderXMLStep<R> loadXML(Reader reader);
/**
* Load XML data
*/
LoaderXMLStep<R> loadXML(InputSource source);
}

View File

@ -0,0 +1,50 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq;
/**
* The <code>Loader</code> API is used for configuring data loads.
* <p>
* The step in constructing the {@link Loader} object where you can set the
* mandatory XML loader options.
*
* @author Lukas Eder
*/
public interface LoaderXMLStep<R extends TableRecord<R>> {
// [...] This API is not yet defined
}

View File

@ -70,6 +70,7 @@ import org.jooq.InsertQuery;
import org.jooq.InsertSelectQuery;
import org.jooq.InsertSetStep;
import org.jooq.InsertValuesStep;
import org.jooq.LoaderOptionsStep;
import org.jooq.MergeUsingStep;
import org.jooq.Query;
import org.jooq.QueryPart;
@ -198,6 +199,18 @@ public class Factory implements Configuration {
return mapping;
}
// -------------------------------------------------------------------------
// Access to the loader API
// -------------------------------------------------------------------------
/**
* Create a new <code>Loader</code> object to load data from a CSV or XML
* source
*/
public final <R extends TableRecord<R>> LoaderOptionsStep<R> loadInto(Table<R> table) {
return new LoaderImpl<R>(this, table);
}
// -------------------------------------------------------------------------
// RenderContext and BindContext accessors
// -------------------------------------------------------------------------

View File

@ -0,0 +1,79 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq.impl;
import java.sql.SQLException;
import org.jooq.LoaderError;
import org.jooq.Query;
/**
* @author Lukas Eder
*/
class LoaderErrorImpl implements LoaderError {
private final SQLException exception;
private final int rowIndex;
private final String[] row;
private final Query query;
LoaderErrorImpl(SQLException exception, String[] row, int rowIndex, Query query) {
this.exception = exception;
this.row = row;
this.rowIndex = rowIndex;
this.query = query;
}
@Override
public SQLException exception() {
return exception;
}
@Override
public int rowIndex() {
return rowIndex;
}
@Override
public String[] row() {
return row;
}
@Override
public Query query() {
return query;
}
}

View File

@ -0,0 +1,465 @@
/**
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq.impl;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.jooq.Condition;
import org.jooq.Field;
import org.jooq.InsertQuery;
import org.jooq.Loader;
import org.jooq.LoaderCSVOptionsStep;
import org.jooq.LoaderCSVStep;
import org.jooq.LoaderError;
import org.jooq.LoaderOptionsStep;
import org.jooq.LoaderXMLStep;
import org.jooq.SimpleSelectQuery;
import org.jooq.Table;
import org.jooq.TableRecord;
import org.jooq.UpdatableTable;
import org.jooq.tools.csv.CSVParser;
import org.jooq.tools.csv.CSVReader;
import org.xml.sax.InputSource;
/**
* @author Lukas Eder
*/
class LoaderImpl<R extends TableRecord<R>> implements
// Cascading interface implementations for Loader behaviour
LoaderOptionsStep<R>,
LoaderXMLStep<R>,
LoaderCSVStep<R>,
LoaderCSVOptionsStep<R>,
Loader<R> {
// Configuration constants
// -----------------------
private final int ON_DUPLICATE_KEY_ERROR = 0;
private final int ON_DUPLICATE_KEY_IGNORE = 1;
private final int ON_DUPLICATE_KEY_UPDATE = 2;
private final int ON_ERROR_ABORT = 0;
private final int ON_ERROR_IGNORE = 1;
private final int COMMIT_NONE = 0;
private final int COMMIT_AFTER = 1;
private final int COMMIT_ALL = 2;
private final int CONTENT_CSV = 0;
private final int CONTENT_XML = 1;
// Configuration data
// ------------------
private final Factory create;
private final Table<R> table;
private final UpdatableTable<R> updatable;
private int onDuplicate = ON_DUPLICATE_KEY_ERROR;
private int onError = ON_ERROR_ABORT;
private int commit = COMMIT_NONE;
private int commitAfter = 1;
private int content = CONTENT_CSV;
private BufferedReader data;
// CSV configuration data
// ----------------------
private int ignoreRows = 1;
private char quote = CSVParser.DEFAULT_QUOTE_CHARACTER;
private char separator = CSVParser.DEFAULT_SEPARATOR;
private Field<?>[] fields;
private boolean[] mainKey;
// Result data
// -----------
private int ignored;
private int processed;
private int stored;
private final List<LoaderError> errors;
LoaderImpl(Factory create, Table<R> table) {
this.create = create;
this.table = table;
this.errors = new ArrayList<LoaderError>();
if (table instanceof UpdatableTable) {
this.updatable = (UpdatableTable<R>) table;
}
else {
this.updatable = null;
}
}
// -------------------------------------------------------------------------
// Configuration setup
// -------------------------------------------------------------------------
@Override
public final LoaderImpl<R> onDuplicateKeyError() {
onDuplicate = ON_DUPLICATE_KEY_ERROR;
return this;
}
@Override
public final LoaderImpl<R> onDuplicateKeyIgnore() {
if (updatable == null) {
throw new IllegalStateException("ON DUPLICATE KEY IGNORE only works on UpdatableTable. Table is not updatable : " + table);
}
onDuplicate = ON_DUPLICATE_KEY_IGNORE;
return this;
}
@Override
public final LoaderImpl<R> onDuplicateKeyUpdate() {
if (updatable == null) {
throw new IllegalStateException("ON DUPLICATE KEY UPDATE only works on UpdatableTable. Table is not updatable : " + table);
}
onDuplicate = ON_DUPLICATE_KEY_UPDATE;
return this;
}
@Override
public final LoaderImpl<R> onErrorIgnore() {
onError = ON_ERROR_IGNORE;
return this;
}
@Override
public final LoaderImpl<R> onErrorAbort() {
onError = ON_ERROR_ABORT;
return this;
}
@Override
public final LoaderImpl<R> commitEach() {
commit = COMMIT_AFTER;
return this;
}
@Override
public final LoaderImpl<R> commitAfter(int number) {
commit = COMMIT_AFTER;
commitAfter = number;
return this;
}
@Override
public final LoaderImpl<R> commitAll() {
commit = COMMIT_ALL;
return this;
}
@Override
public final LoaderImpl<R> commitNone() {
commit = COMMIT_NONE;
return this;
}
@Override
public final LoaderImpl<R> loadCSV(File file) throws FileNotFoundException {
content = CONTENT_CSV;
data = new BufferedReader(new FileReader(file));
return this;
}
@Override
public final LoaderImpl<R> loadCSV(String csv) {
content = CONTENT_CSV;
data = new BufferedReader(new StringReader(csv));
return this;
}
@Override
public final LoaderImpl<R> loadCSV(InputStream stream) {
content = CONTENT_CSV;
data = new BufferedReader(new InputStreamReader(stream));
return this;
}
@Override
public final LoaderImpl<R> loadCSV(Reader reader) {
content = CONTENT_CSV;
data = new BufferedReader(reader);
return this;
}
@Override
public final LoaderImpl<R> loadXML(File file) throws FileNotFoundException {
content = CONTENT_XML;
throw new UnsupportedOperationException("This is not yet implemented");
}
@Override
public final LoaderImpl<R> loadXML(String xml) {
content = CONTENT_XML;
throw new UnsupportedOperationException("This is not yet implemented");
}
@Override
public final LoaderImpl<R> loadXML(InputStream stream) {
content = CONTENT_XML;
throw new UnsupportedOperationException("This is not yet implemented");
}
@Override
public final LoaderImpl<R> loadXML(Reader reader) {
content = CONTENT_XML;
throw new UnsupportedOperationException("This is not yet implemented");
}
@Override
public final LoaderImpl<R> loadXML(InputSource source) {
content = CONTENT_XML;
throw new UnsupportedOperationException("This is not yet implemented");
}
// -------------------------------------------------------------------------
// CSV configuration
// -------------------------------------------------------------------------
@Override
public final LoaderImpl<R> fields(Field<?>... f) {
this.fields = f;
this.mainKey = new boolean[f.length];
if (updatable != null) {
for (int i = 0; i < fields.length; i++) {
if (fields[i] != null) {
if (updatable.getMainKey().getFields().contains(fields[i])) {
mainKey[i] = true;
}
}
}
}
return this;
}
@Override
public final LoaderImpl<R> fields(Collection<? extends Field<?>> f) {
return fields(f.toArray(new Field[f.size()]));
}
@Override
public final LoaderImpl<R> ignoreRows(int number) {
ignoreRows = number;
return this;
}
@Override
public final LoaderImpl<R> quote(char q) {
this.quote = q;
return this;
}
@Override
public final LoaderImpl<R> separator(char s) {
this.separator = s;
return this;
}
// -------------------------------------------------------------------------
// XML configuration
// -------------------------------------------------------------------------
// [...] to be specified
// -------------------------------------------------------------------------
// Execution
// -------------------------------------------------------------------------
@Override
public final LoaderImpl<R> execute() throws IOException {
if (content == CONTENT_CSV) {
executeCSV();
}
else if (content == CONTENT_XML) {
throw new UnsupportedOperationException();
}
else {
throw new IllegalStateException();
}
return this;
}
private final void executeCSV() throws IOException {
CSVReader reader = new CSVReader(data, separator, quote, ignoreRows);
try {
String[] row = null;
// TODO: When running in COMMIT_AFTER > 1 or COMMIT_ALL mode, then
// it might be better to bulk load / merge n records
rowloop: while ((row = reader.readNext()) != null) {
processed++;
InsertQuery<R> insert = create.insertQuery(table);
for (int i = 0; i < row.length; i++) {
if (i < fields.length && fields[i] != null) {
insert.addValue(fields[i], fields[i].getDataType().convert(row[i]));
}
}
// TODO: This is only supported by some dialects. Let other
// dialects execute a SELECT and then either an INSERT or UPDATE
if (onDuplicate == ON_DUPLICATE_KEY_UPDATE) {
insert.onDuplicateKeyUpdate(true);
for (int i = 0; i < row.length; i++) {
if (i < fields.length && mainKey[i]) {
insert.addValueForUpdate(fields[i], fields[i].getDataType().convert(row[i]));
}
}
}
// TODO: This can be implemented faster using a MERGE statements
// in some dialects
else if (onDuplicate == ON_DUPLICATE_KEY_IGNORE) {
SimpleSelectQuery<R> select = create.selectQuery(table);
for (int i = 0; i < row.length; i++) {
if (i < fields.length && mainKey[i]) {
select.addConditions(getCondition(fields[i], row[i]));
}
}
try {
if (select.execute() > 0) {
ignored++;
continue rowloop;
}
}
catch (SQLException e) {
errors.add(new LoaderErrorImpl(e, row, processed - 1, select));
}
}
// Don't do anything. Let the execution fail
else if (onDuplicate == ON_DUPLICATE_KEY_ERROR) {
}
try {
insert.execute();
stored++;
if (commit == COMMIT_AFTER) {
if (processed % commitAfter == 0) {
create.getConnection().commit();
}
}
}
catch (SQLException e) {
errors.add(new LoaderErrorImpl(e, row, processed - 1, insert));
ignored++;
if (onError == ON_ERROR_ABORT) {
break rowloop;
}
}
}
// Rollback on errors in COMMIT_ALL mode
try {
if (commit == COMMIT_ALL) {
if (!errors.isEmpty()) {
create.getConnection().rollback();
}
else {
create.getConnection().commit();
}
}
// Commit remaining elements in COMMIT_AFTER mode
else if (commit == COMMIT_AFTER) {
if (processed % commitAfter != 0) {
create.getConnection().commit();
}
}
}
catch (SQLException e) {
errors.add(new LoaderErrorImpl(e, null, processed - 1, null));
}
}
finally {
reader.close();
}
}
/**
* Get a type-safe condition
*/
private <T> Condition getCondition(Field<T> field, String string) {
return field.equal(field.getDataType().convert(string));
}
// -------------------------------------------------------------------------
// Outcome
// -------------------------------------------------------------------------
@Override
public final List<LoaderError> errors() {
return errors;
}
@Override
public final int processed() {
return processed;
}
@Override
public final int ignored() {
return ignored;
}
@Override
public final int stored() {
return stored;
}
}

View File

@ -0,0 +1,366 @@
/**
* Copyright 2005 Bytecode Pty Ltd.
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq.tools.csv;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* A very simple CSV parser released under a commercial-friendly license. This
* just implements splitting a single line into fields.
*
* @author Glen Smith
* @author Rainer Pruy
*/
public class CSVParser {
private final char separator;
private final char quotechar;
private final char escape;
private final boolean strictQuotes;
private String pending;
private boolean inField = false;
private final boolean ignoreLeadingWhiteSpace;
/**
* The default separator to use if none is supplied to the constructor.
*/
public static final char DEFAULT_SEPARATOR = ',';
public static final int INITIAL_READ_SIZE = 128;
/**
* The default quote character to use if none is supplied to the
* constructor.
*/
public static final char DEFAULT_QUOTE_CHARACTER = '"';
/**
* The default escape character to use if none is supplied to the
* constructor.
*/
public static final char DEFAULT_ESCAPE_CHARACTER = '\\';
/**
* The default strict quote behavior to use if none is supplied to the
* constructor
*/
public static final boolean DEFAULT_STRICT_QUOTES = false;
/**
* The default leading whitespace behavior to use if none is supplied to the
* constructor
*/
public static final boolean DEFAULT_IGNORE_LEADING_WHITESPACE = true;
/**
* This is the "null" character - if a value is set to this then it is
* ignored. I.E. if the quote character is set to null then there is no
* quote character.
*/
public static final char NULL_CHARACTER = '\0';
/**
* Constructs CSVParser using a comma for the separator.
*/
public CSVParser() {
this(DEFAULT_SEPARATOR, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER);
}
/**
* Constructs CSVParser with supplied separator.
*
* @param separator the delimiter to use for separating entries.
*/
public CSVParser(char separator) {
this(separator, DEFAULT_QUOTE_CHARACTER, DEFAULT_ESCAPE_CHARACTER);
}
/**
* Constructs CSVParser with supplied separator and quote char.
*
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
*/
public CSVParser(char separator, char quotechar) {
this(separator, quotechar, DEFAULT_ESCAPE_CHARACTER);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param escape the character to use for escaping a separator or quote
*/
public CSVParser(char separator, char quotechar, char escape) {
this(separator, quotechar, escape, DEFAULT_STRICT_QUOTES);
}
/**
* Constructs CSVReader with supplied separator and quote char. Allows
* setting the "strict quotes" flag
*
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param escape the character to use for escaping a separator or quote
* @param strictQuotes if true, characters outside the quotes are ignored
*/
public CSVParser(char separator, char quotechar, char escape, boolean strictQuotes) {
this(separator, quotechar, escape, strictQuotes, DEFAULT_IGNORE_LEADING_WHITESPACE);
}
/**
* Constructs CSVReader with supplied separator and quote char. Allows
* setting the "strict quotes" and "ignore leading whitespace" flags
*
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param escape the character to use for escaping a separator or quote
* @param strictQuotes if true, characters outside the quotes are ignored
* @param ignoreLeadingWhiteSpace if true, white space in front of a quote
* in a field is ignored
*/
public CSVParser(char separator, char quotechar, char escape, boolean strictQuotes, boolean ignoreLeadingWhiteSpace) {
if (anyCharactersAreTheSame(separator, quotechar, escape)) {
throw new UnsupportedOperationException("The separator, quote, and escape characters must be different!");
}
if (separator == NULL_CHARACTER) {
throw new UnsupportedOperationException("The separator character must be defined!");
}
this.separator = separator;
this.quotechar = quotechar;
this.escape = escape;
this.strictQuotes = strictQuotes;
this.ignoreLeadingWhiteSpace = ignoreLeadingWhiteSpace;
}
private static boolean anyCharactersAreTheSame(char separator, char quotechar, char escape) {
return isSameCharacter(separator, quotechar) || isSameCharacter(separator, escape)
|| isSameCharacter(quotechar, escape);
}
private static boolean isSameCharacter(char c1, char c2) {
return c1 != NULL_CHARACTER && c1 == c2;
}
/**
* @return true if something was left over from last call(s)
*/
public boolean isPending() {
return pending != null;
}
public String[] parseLineMulti(String nextLine) throws IOException {
return parseLine(nextLine, true);
}
public String[] parseLine(String nextLine) throws IOException {
return parseLine(nextLine, false);
}
/**
* Parses an incoming String and returns an array of elements.
*
* @param nextLine the string to parse
* @param multi
* @return the comma-tokenized list of elements, or null if nextLine is null
* @throws IOException if bad things happen during the read
*/
private String[] parseLine(String nextLine, boolean multi) throws IOException {
if (!multi && pending != null) {
pending = null;
}
if (nextLine == null) {
if (pending != null) {
String s = pending;
pending = null;
return new String[] { s };
}
else {
return null;
}
}
List<String> tokensOnThisLine = new ArrayList<String>();
StringBuilder sb = new StringBuilder(INITIAL_READ_SIZE);
boolean inQuotes = false;
if (pending != null) {
sb.append(pending);
pending = null;
inQuotes = true;
}
for (int i = 0; i < nextLine.length(); i++) {
char c = nextLine.charAt(i);
if (c == this.escape) {
if (isNextCharacterEscapable(nextLine, inQuotes || inField, i)) {
sb.append(nextLine.charAt(i + 1));
i++;
}
}
else if (c == quotechar) {
if (isNextCharacterEscapedQuote(nextLine, inQuotes || inField, i)) {
sb.append(nextLine.charAt(i + 1));
i++;
}
else {
// inQuotes = !inQuotes;
// the tricky case of an embedded quote in the middle:
// a,bc"d"ef,g
if (!strictQuotes) {
if (i > 2 // not on the beginning of the line
&& nextLine.charAt(i - 1) != this.separator // not
// at
// the
// beginning
// of an
// escape
// sequence
&& nextLine.length() > (i + 1) && nextLine.charAt(i + 1) != this.separator // not
// at
// the
// end
// of
// an
// escape
// sequence
) {
if (ignoreLeadingWhiteSpace && sb.length() > 0 && isAllWhiteSpace(sb)) {
sb.setLength(0); // discard white space leading
// up to quote
}
else {
sb.append(c);
// continue;
}
}
}
inQuotes = !inQuotes;
}
inField = !inField;
}
else if (c == separator && !inQuotes) {
tokensOnThisLine.add(sb.toString());
sb.setLength(0); // start work on next token
inField = false;
}
else {
if (!strictQuotes || inQuotes) {
sb.append(c);
inField = true;
}
}
}
// line is done - check status
if (inQuotes) {
if (multi) {
// continuing a quoted section, re-append newline
sb.append("\n");
pending = sb.toString();
sb = null; // this partial content is not to be added to field
// list yet
}
else {
throw new IOException("Un-terminated quoted field at end of CSV line");
}
}
if (sb != null) {
tokensOnThisLine.add(sb.toString());
}
return tokensOnThisLine.toArray(new String[tokensOnThisLine.size()]);
}
/**
* precondition: the current character is a quote or an escape
*
* @param nextLine the current line
* @param inQuotes true if the current context is quoted
* @param i current index in line
* @return true if the following character is a quote
*/
private boolean isNextCharacterEscapedQuote(String nextLine, boolean inQuotes, int i) {
return inQuotes // we are in quotes, therefore there can be escaped
// quotes in here.
&& nextLine.length() > (i + 1) // there is indeed another character
// to check.
&& nextLine.charAt(i + 1) == quotechar;
}
/**
* precondition: the current character is an escape
*
* @param nextLine the current line
* @param inQuotes true if the current context is quoted
* @param i current index in line
* @return true if the following character is a quote
*/
protected boolean isNextCharacterEscapable(String nextLine, boolean inQuotes, int i) {
return inQuotes // we are in quotes, therefore there can be escaped
// quotes in here.
&& nextLine.length() > (i + 1) // there is indeed another character
// to check.
&& (nextLine.charAt(i + 1) == quotechar || nextLine.charAt(i + 1) == this.escape);
}
/**
* precondition: sb.length() > 0
*
* @param sb A sequence of characters to examine
* @return true if every character in the sequence is whitespace
*/
protected boolean isAllWhiteSpace(CharSequence sb) {
boolean result = true;
for (int i = 0; i < sb.length(); i++) {
char c = sb.charAt(i);
if (!Character.isWhitespace(c)) {
return false;
}
}
return result;
}
}

View File

@ -0,0 +1,260 @@
/**
* Copyright 2005 Bytecode Pty Ltd.
* Copyright (c) 2009-2011, Lukas Eder, lukas.eder@gmail.com
* All rights reserved.
*
* This software is licensed to you under the Apache License, Version 2.0
* (the "License"); You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* . Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* . Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* . Neither the name "jOOQ" nor the names of its contributors may be
* used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.jooq.tools.csv;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
/**
* A very simple CSV reader released under a commercial-friendly license.
*
* @author Glen Smith
*/
public class CSVReader implements Closeable {
private BufferedReader br;
private boolean hasNext = true;
private CSVParser parser;
private int skipLines;
private boolean linesSkiped;
/**
* The default line to start reading.
*/
public static final int DEFAULT_SKIP_LINES = 0;
/**
* Constructs CSVReader using a comma for the separator.
*
* @param reader the reader to an underlying CSV source.
*/
public CSVReader(Reader reader) {
this(reader, CSVParser.DEFAULT_SEPARATOR, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER);
}
/**
* Constructs CSVReader with supplied separator.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries.
*/
public CSVReader(Reader reader, char separator) {
this(reader, separator, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
*/
public CSVReader(Reader reader, char separator, char quotechar) {
this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES,
CSVParser.DEFAULT_STRICT_QUOTES);
}
/**
* Constructs CSVReader with supplied separator, quote char and quote
* handling behavior.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param strictQuotes sets if characters outside the quotes are ignored
*/
public CSVReader(Reader reader, char separator, char quotechar, boolean strictQuotes) {
this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, strictQuotes);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param escape the character to use for escaping a separator or quote
*/
public CSVReader(Reader reader, char separator, char quotechar, char escape) {
this(reader, separator, quotechar, escape, DEFAULT_SKIP_LINES, CSVParser.DEFAULT_STRICT_QUOTES);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param line the line number to skip for start reading
*/
public CSVReader(Reader reader, char separator, char quotechar, int line) {
this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, line, CSVParser.DEFAULT_STRICT_QUOTES);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param escape the character to use for escaping a separator or quote
* @param line the line number to skip for start reading
*/
public CSVReader(Reader reader, char separator, char quotechar, char escape, int line) {
this(reader, separator, quotechar, escape, line, CSVParser.DEFAULT_STRICT_QUOTES);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param escape the character to use for escaping a separator or quote
* @param line the line number to skip for start reading
* @param strictQuotes sets if characters outside the quotes are ignored
*/
public CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes) {
this(reader, separator, quotechar, escape, line, strictQuotes, CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE);
}
/**
* Constructs CSVReader with supplied separator and quote char.
*
* @param reader the reader to an underlying CSV source.
* @param separator the delimiter to use for separating entries
* @param quotechar the character to use for quoted elements
* @param escape the character to use for escaping a separator or quote
* @param line the line number to skip for start reading
* @param strictQuotes sets if characters outside the quotes are ignored
* @param ignoreLeadingWhiteSpace it true, parser should ignore white space
* before a quote in a field
*/
public CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes,
boolean ignoreLeadingWhiteSpace) {
this.br = new BufferedReader(reader);
this.parser = new CSVParser(separator, quotechar, escape, strictQuotes, ignoreLeadingWhiteSpace);
this.skipLines = line;
}
/**
* Reads the entire file into a List with each element being a String[] of
* tokens.
*
* @return a List of String[], with each String[] representing a line of the
* file.
* @throws IOException if bad things happen during the read
*/
public List<String[]> readAll() throws IOException {
List<String[]> allElements = new ArrayList<String[]>();
while (hasNext) {
String[] nextLineAsTokens = readNext();
if (nextLineAsTokens != null)
allElements.add(nextLineAsTokens);
}
return allElements;
}
/**
* Reads the next line from the buffer and converts to a string array.
*
* @return a string array with each comma-separated element as a separate
* entry.
* @throws IOException if bad things happen during the read
*/
public String[] readNext() throws IOException {
String[] result = null;
do {
String nextLine = getNextLine();
if (!hasNext) {
return result; // should throw if still pending?
}
String[] r = parser.parseLineMulti(nextLine);
if (r.length > 0) {
if (result == null) {
result = r;
}
else {
String[] t = new String[result.length + r.length];
System.arraycopy(result, 0, t, 0, result.length);
System.arraycopy(r, 0, t, result.length, r.length);
result = t;
}
}
}
while (parser.isPending());
return result;
}
/**
* Reads the next line from the file.
*
* @return the next line from the file without trailing newline
* @throws IOException if bad things happen during the read
*/
private String getNextLine() throws IOException {
if (!this.linesSkiped) {
for (int i = 0; i < skipLines; i++) {
br.readLine();
}
this.linesSkiped = true;
}
String nextLine = br.readLine();
if (nextLine == null) {
hasNext = false;
}
return hasNext ? nextLine : null;
}
/**
* Closes the underlying reader.
*
* @throws IOException if the close fails
*/
@Override
public void close() throws IOException {
br.close();
}
}