Trying to load Apache Tomcat logs with multiple lines for single row , but its only loading single line and showing NULL values for rest of lines until it reaches next record .
I have tried regex from earlier post but they are not working , the same regular expression is working in testing tools here is the link for one of them : http://rubular.com/r/nVrWhuwg1c
It is properly recognizing the lines and seperating them in correct groups but the same code when i am trying in hive its not working .
Sample Record :
12-Dec-2013 12:03:26.988 WARNING [localhost-startStop-1] org.apache.tomcat.util.scan.StandardJarScanner.scan Failed to scan [file:/usr/share/java/jsp-api-2.3.jar] from classloader hierarchy
java.io.FileNotFoundException: /usr/share/java/jsp-api-2.3.jar (No such file or directory)
at java.util.zip.ZipFile.open(Native Method)
at java.util.zip.ZipFile.<init>(ZipFile.java:225)
at java.util.zip.ZipFile.<init>(ZipFile.java:155)
at java.util.jar.JarFile.<init>(JarFile.java:166)
at java.util.jar.JarFile.<init>(JarFile.java:130)
at org.apache.tomcat.util.scan.JarFileUrlJar.<init>(JarFileUrlJar.java:60)
at org.apache.tomcat.util.scan.JarFactory.newInstance(JarFactory.java:49)
at org.apache.tomcat.util.scan.StandardJarScanner.process(StandardJarScanner.java:338)
at org.apache.tomcat.util.scan.StandardJarScanner.scan(StandardJarScanner.java:288)
at org.apache.catalina.startup.ContextConfig.processJarsForWebFragments(ContextConfig.java:1898)
at org.apache.catalina.startup.ContextConfig.webConfig(ContextConfig.java:1126)
at org.apache.catalina.startup.ContextConfig.configureStart(ContextConfig.java:775)
at org.apache.catalina.startup.ContextConfig.lifecycleEvent(ContextConfig.java:299)
at org.apache.catalina.util.LifecycleBase.fireLifecycleEvent(LifecycleBase.java:94)
at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5105)
at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:150)
at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:752)
at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:728)
at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:734)
at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:596)
at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1805)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
13-Dec-2016 12:03:24.988 WARNING [localhost-startStop-1] org.apache.tomcat.util.scan.StandardJarScanner.scan Failed to scan [file:/usr/share/java/el-api-3.0.jar] from classloader hierarchy
java.io.FileNotFoundException: /usr/share/java/el-api-3.0.jar (No such file or directory)
at java.util.zip.ZipFile.open(Native Method)
at java.util.zip.ZipFile.<init>(ZipFile.java:225)
at java.util.zip.ZipFile.<init>(ZipFile.java:155)
at java.util.jar.JarFile.<init>(JarFile.java:166)
at java.util.jar.JarFile.<init>(JarFile.java:130)
at org.apache.tomcat.util.scan.JarFileUrlJar.<init>(JarFileUrlJar.java:60)
at org.apache.tomcat.util.scan.JarFactory.newInstance(JarFactory.java:49)
at org.apache.tomcat.util.scan.StandardJarScanner.process(StandardJarScanner.java:338)
at org.apache.tomcat.util.scan.StandardJarScanner.scan(StandardJarScanner.java:288)
at org.apache.jasper.servlet.TldScanner.scanJars(TldScanner.java:262)
at org.apache.jasper.servlet.TldScanner.scan(TldScanner.java:104)
at org.apache.jasper.servlet.JasperInitializer.onStartup(JasperInitializer.java:101)
at org.apache.catalina.core.StandardContext.startInternal(StandardContext.java:5196)
at org.apache.catalina.util.LifecycleBase.start(LifecycleBase.java:150)
at org.apache.catalina.core.ContainerBase.addChildInternal(ContainerBase.java:752)
at org.apache.catalina.core.ContainerBase.addChild(ContainerBase.java:728)
at org.apache.catalina.core.StandardHost.addChild(StandardHost.java:734)
at org.apache.catalina.startup.HostConfig.deployDescriptor(HostConfig.java:596)
at org.apache.catalina.startup.HostConfig$DeployDescriptor.run(HostConfig.java:1805)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:748)
Create Table Query which i am trying
CREATE EXTERNAL TABLE apache_cat_log_test
(
logdatetime STRING,
logtype STRING,
requestid STRING,
verbosedata STRING,
msg string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
WITH SERDEPROPERTIES (
"input.regex" = "^(\\d{2}-\\w{3}-\\d{4})\\s+(.*?)\\s+(\\w+)\\s*(.*?)\\s*$((?:\\s*(?!\\d{2}).*?$)*)"
)
STORED AS TEXTFILE
location '/catlogs';