Tuesday, 18 August 2015

How to use Java 7 Pattern & Matcher to extract values from a String?

    public static void main(String[] args) {
        String txt = "\"199.47.181.213\" \"NULL-AUTH-USER\" \"06/Oct/2014:11:19:54 +0000\" \"GET /site/\" 'HTTP/1.0\" 200 20668 ";
        String rgx = "\"(.*)\" \"(.*)\" \"(.*)\" \"(.*)\" ([0-9]+) ([0-9]+) ";
        SimpleDateFormat dateFormat = new SimpleDateFormat("dd/MMM/yyyy:hh:mm:ss +SSSS");
        Pattern p = Pattern.compile(rgx);
        Matcher m = p.matcher(txt);
        boolean b = m.matches();
        if (b) {
            int groupCount = m.groupCount();
            for (int i = 0; i <= groupCount; i++) {
                String g = m.group(i);
                System.out.print("matched group " + i + ":\t");
                if (i == 3) {
                    Date parsedDate;
                    try {
                        parsedDate = dateFormat.parse(g);
                        Timestamp timestamp = new java.sql.Timestamp(parsedDate.getTime());
                        System.out.println(timestamp);
                    } catch (ParseException ex) {
                        Logger.getLogger(PatternTester.class.getName()).log(Level.SEVERE, g, ex);
                    }
                } else {
                    System.out.println(g);
                }
            }
        } else {
            System.out.println(txt + "\nDOES NOT MATCH\n" + rgx);
        }
    }

below is the output:

matched group 0: "199.47.181.213" "NULL-AUTH-USER" "06/Oct/2014:11:19:54 +0000" "GET /site/" 'HTTP/1.0" 200 20668
matched group 1: 199.47.181.213
matched group 2: NULL-AUTH-USER
matched group 3: 2014-10-06 11:19:54.0
matched group 4: GET /site/" 'HTTP/1.0
matched group 5: 200
matched group 6: 20668