Difference between revisions of "Fix ARTigo Dating"
From DHVLab
Wiki admin (talk | contribs) |
Wiki admin (talk | contribs) |
||
(One intermediate revision by the same user not shown) | |||
Line 1: | Line 1: | ||
When accessing the data or creating plots you could for example use: | When accessing the data or creating plots you could for example use: | ||
− | < | + | <syntaxhighlight lang="sql">AVG(date_min,date_max) AS dating</syntaxhighlight> |
− | < | + | Warning: Extremely unoptimized, slow code :-) |
+ | <syntaxhighlight lang="java"> | ||
import java.sql.Connection; | import java.sql.Connection; | ||
import java.sql.DriverManager; | import java.sql.DriverManager; | ||
Line 180: | Line 181: | ||
} | } | ||
− | </ | + | </syntaxhighlight> |
Latest revision as of 10:10, 20 May 2016
When accessing the data or creating plots you could for example use:
AVG(date_min,date_max) AS dating
Warning: Extremely unoptimized, slow code :-)
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/* Quick fuzzy fix to convert dating strings in the ARTigo
* database to date_min, date_max and an indicator about the
* connection between both numbers.
* Jh., century etc information is parsed and translated, as well
* as ranges, estimates,....
*
* (c) Linus Kohl, kohl@munichtrading.com
*/
public class MySQL {
static final String JDBC_DRIVER = "com.mysql.jdbc.Driver";
static final String DB_URL = "jdbc:mysql://XX/artigo";
static final String USER = "XXX";
static final String PASS = "XXX";
private static Connection connect = null;
private static Statement query = null;
private static ResultSet resultSet = null;
public static void readDataBase() throws Exception {
try {
// load jdbc driver
Class.forName(JDBC_DRIVER);
// connect to database
connect = DriverManager.getConnection(DB_URL, USER, PASS);
query = connect.createStatement();
// select all artworks where date are not set
// get row by row
PreparedStatement query = connect.prepareStatement(
"select * from sum_artworks where date_min != \"\"",
ResultSet.TYPE_FORWARD_ONLY,
ResultSet.CONCUR_UPDATABLE);
query.setFetchSize(Integer.MIN_VALUE);
// execute query
resultSet = query.executeQuery();
// loop over result
loopResultSet(resultSet);
} catch (Exception e) {
throw e;
} finally {
close();
}
}
/* Extract all integers from a string
* and return as array */
public static List<Integer> extractInt(String str) {
List<Integer> numbers = new ArrayList<Integer>();
Matcher matcher = Pattern.compile("\\d+").matcher(str);
while (matcher.find()) {
numbers.add(Integer.parseInt(matcher.group()));
}
return numbers;
}
/* Loop over rows and and try to fix the date */
private static void loopResultSet(ResultSet resultSet) throws SQLException {
while (resultSet.next()) {
// min dating
int date_min = 0;
// max dating
int date_max = 0;
// store -,/ or ~ for fuzzy input
String operator = "";
// get the dating string
String dating = resultSet.getString("dating");
if (dating != null) {
// transform to lowercase
dating = dating.toLowerCase();
try {
// parse for integers in string
List<Integer> dates = extractInt(dating);
// check if string contains century information
if (dating.contains("jh") ||
dating.contains("jhd") ||
dating.contains("century") ||
dating.contains("ct.") ||
dating.contains("th") ||
dating.contains("jahrhundert")) {
// get largest integer between 1 and 20
for (int i = 0; i < dates.size(); i++) {
if(dates.get(i) >= 1 &&
dates.get(i)<= 20) {
// date_max = integer * 100 and date_min 100 years earlier
date_max = dates.get(i)*100;
date_min = date_max-100;
}
}
} else {
// no century information found
switch (dates.size()) {
// no integer found
case 0: {
break;
}
case 1: {
// one integer found, date_min = date_max
operator = "=";
date_min = date_max = dates.get(0);
break;
}
case 2: {
// set indicator if the dating is before the specified date
if (dating.contains("before") ||
dating.contains("vor"))
operator = "<";
// set indicator for /
if (dating.contains("/"))
operator = "/";
// set indicator for -
if (dating.contains("-"))
operator = "-";
// set indicator for fuzzy specification
if (dating.contains("ca.") ||
dating.contains("um "))
operator = "~";
// set the dates
date_min = dates.get(0);
date_max = dates.get(1);
// fix for special case inputs like 1933/34
if(date_min >= 100 && date_max <= 100) {
date_max = date_min+date_max;
}
break;
}
}
}
System.out.println("Dating: " + dating + " " + date_min +
" " + date_max + " " + operator);
// set the dates
resultSet.updateInt("date_min", date_min);
resultSet.updateInt("date_max", date_max);
resultSet.updateString("date_info", operator);
// update the db entry
resultSet.updateRow();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
/* close the connection, query and statements */
private static void close() {
try {
if (resultSet != null) {
resultSet.close();
}
if (query != null) {
query.close();
}
if (connect != null) {
connect.close();
}
} catch (Exception e) {
}
}
public static void main(String args[]) throws Exception {
readDataBase();
}
}