java
主页 > 软件编程 > java >

springboot集成spark并使用spark-sql的介绍

2022-02-20 | 秩名 | 点击:

首先添加相关依赖:

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

  <modelVersion>4.0.0</modelVersion>

  <parent>

    <groupId>org.springframework.boot</groupId>

    <artifactId>spring-boot-starter-parent</artifactId>

    <version>1.5.6.RELEASE</version>

    <relativePath />

  </parent>

  <groupId>com.cord</groupId>

  <artifactId>spark-example</artifactId>

  <version>1.0-SNAPSHOT</version>

  <name>spark-example</name>

  <!-- FIXME change it to the project's website -->

  <url>http://www.example.com</url>

  <properties>

    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>

    <java.version>1.8</java.version>

    <scala.version>2.10.3</scala.version>

    <maven.compiler.source>1.8</maven.compiler.source>

    <maven.compiler.target>1.8</maven.compiler.target>

  </properties>

  <dependencies>

    <dependency>

      <groupId>org.springframework.boot</groupId>

      <artifactId>spring-boot-starter</artifactId>

      <version>1.5.6.RELEASE</version>

        <exclusions>

            <exclusion>

                <groupId>org.springframework.boot</groupId>

                <artifactId>spring-boot-starter-logging</artifactId>

            </exclusion>

        </exclusions>

    </dependency>

    <dependency>

      <groupId>org.apache.spark</groupId>

      <artifactId>spark-core_2.10</artifactId>

      <version>1.6.1</version>

      <scope>provided</scope>

        <exclusions>

            <exclusion>

                <groupId>org.slf4j</groupId>

                <artifactId>slf4j-log4j12</artifactId>

            </exclusion>

            <exclusion>

                <groupId>log4j</groupId>

                <artifactId>log4j</artifactId>

            </exclusion>

        </exclusions>

    </dependency>

    <dependency>

      <groupId>org.apache.spark</groupId>

      <artifactId>spark-sql_2.10</artifactId>

      <version>1.6.1</version>

      <scope>provided</scope>

    </dependency>

    <dependency>

      <groupId>org.apache.spark</groupId>

      <artifactId>spark-hive_2.10</artifactId>

      <version>1.6.1</version>

      <scope>provided</scope>

    </dependency>

    <dependency>

      <groupId>org.scala-lang</groupId>

      <artifactId>scala-library</artifactId>

      <version>${scala.version}</version>

      <scope>provided</scope>

    </dependency>

    <!-- yarn-cluster模式 -->

    <dependency>

      <groupId>mysql</groupId>

      <artifactId>mysql-connector-java</artifactId>

      <version>5.1.22</version>

    </dependency>

  </dependencies>

    <build>

        <plugins>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-shade-plugin</artifactId>

                <dependencies>

                    <dependency>

                        <groupId>org.springframework.boot</groupId>

                        <artifactId>spring-boot-maven-plugin</artifactId>

                        <version>1.5.6.RELEASE</version>

                    </dependency>

                </dependencies>

                <configuration>

                    <keepDependenciesWithProvidedScope>false</keepDependenciesWithProvidedScope>

                    <createDependencyReducedPom>false</createDependencyReducedPom>

                    <filters>

                        <filter>

                            <artifact>*:*</artifact>

                            <excludes>

                                <exclude>META-INF/*.SF</exclude>

                                <exclude>META-INF/*.DSA</exclude>

                                <exclude>META-INF/*.RSA</exclude>

                            </excludes>

                        </filter>

                    </filters>

                    <transformers>

                        <transformer

                                implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">

                            <resource>META-INF/spring.handlers</resource>

                        </transformer>

                        <transformer

                                implementation="org.springframework.boot.maven.PropertiesMergingResourceTransformer">

                            <resource>META-INF/spring.factories</resource>

                        </transformer>

                        <transformer

                                implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">

                            <resource>META-INF/spring.schemas</resource>

                        </transformer>

                        <transformer

                                implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />

                        <transformer

                                implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">

                            <mainClass>com.cord.StartApplication</mainClass>

                        </transformer>

                    </transformers>

                </configuration>

                <executions>

                    <execution>

                        <phase>package</phase>

                        <goals>

                            <goal>shade</goal>

                        </goals>

                    </execution>

                </executions>

            </plugin>

        </plugins>

    </build>

</project>

需要注意的是依赖中排除掉的日志模块,以及特殊的打包方式

定义配置类:

SparkContextBean.class

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

@Configuration

public class SparkContextBean {

    private String appName = "sparkExp";

    private String master = "local";

    @Bean

    @ConditionalOnMissingBean(SparkConf.class)

    public SparkConf sparkConf() throws Exception {

        SparkConf conf = new SparkConf().setAppName(appName).setMaster(master);

        return conf;

    }

    @Bean

    @ConditionalOnMissingBean

    public JavaSparkContext javaSparkContext() throws Exception {

        return new JavaSparkContext(sparkConf());

    }

    @Bean

    @ConditionalOnMissingBean

    public HiveContext hiveContext() throws Exception {

        return new HiveContext(javaSparkContext());

    }

    ......

}

启动类:

StartApplication.class

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

@SpringBootApplication

public class StartApplication implements CommandLineRunner {

    @Autowired

    private HiveContext hc;

    public static void main(String[] args) {

        SpringApplication.run(StartApplication.class, args);

    }

    @Override

    public void run(String... args) throws Exception {

        DataFrame df = hc.sql("select count(1) from LCS_DB.STAFF_INFO");

        List<Long> result = df.javaRDD().map((Function<Row, Long>) row -> {

            return row.getLong(0);

        }).collect();

        result.stream().forEach(System.out::println);

}

执行方式:

1

2

3

4

5

6

spark-submit \

    --class com.cord.StartApplication  \

    --executor-memory 4G \

    --num-executors 8 \

    --master yarn-client \

/data/cord/spark-example-1.0-SNAPSHOT.jar

参考链接:

https://stackoverflow.com/questions/45189701/submitting-spring-boot-application-jar-to-spark-submit

https://my.oschina.net/woter/blog/1843755

原文链接:https://www.cnblogs.com/cord/p/9530404.html
相关文章
最新更新